# Load data
reanalysis_res <- read.csv("data/reanalysis-results.csv")
rd_papers <- read.csv("data/cl-rd-papers-meta.csv")
reanalysis_res <- merge(
  reanalysis_res,
  subset(rd_papers, data_avail, c("estimate_code", "tstat", "main_estimate", "election_data", "weight")),
  by = "estimate_code"
)

# Functions to make and format power tables

run_power_analysis <- function(power_res, weights) {
  sapply(
    c("p60" = 0.6, "p80" = 0.8, "p95" = 0.95),
    function(threshold) {
      weighted.mean(power_res >= threshold, weights, na.rm = TRUE)
    }
  )
}

all_power_analyses <- function(res_data) {
  with(res_data, rbind(
    run_power_analysis(power_0p1, weight),
    run_power_analysis(power_0p2, weight),
    run_power_analysis(power_0p5, weight),
    run_power_analysis(power_0p8, weight)
  ))
}

format_power_table <- function(power_table, digits = 2) {
  power_table <- format(power_table, digits = digits)
  power_table <- rbind(c("60\\%", "80\\%", "95\\%"), power_table)
  power_table <- cbind(c("Effect size", "0.1", "0.2", "0.5", "0.8"), power_table)
  power_table <- apply(power_table, 1, paste0, collapse = " & ")
  power_table <- paste0(power_table, " \\\\")
  power_table <- c(
    "\\toprule",
    "& \\multicolumn{3}{c}{Power} \\\\ \\cmidrule{2-4}",
    power_table[1],
    "\\midrule",
    power_table[2:5],
    "\\bottomrule"
  )
  paste0(power_table, collapse = "\n")
}

make_power_table <- function(res_data, digits = 2) {
  format_power_table(all_power_analyses(res_data), digits)
}


# Table 2: Proportion of estimates achieving 60%, 80% and 95% power under different effect sizes
power_table <- make_power_table(reanalysis_res)
cat(power_table, file = "output/tables/table2-power-results.tex")

# Table 3a: Power election data
power_table <- make_power_table(subset(reanalysis_res, election_data), 1)
cat(power_table, file = "output/tables/table3a-election-power-results.tex")

# Table 3b: Power non-election data
power_table <- make_power_table(subset(reanalysis_res, !election_data))
cat(power_table, file = "output/tables/table3b-nonelection-power-results.tex")


#  Tables S2 & S3

articles <- subset(
  rd_papers,
  !duplicated(article_code),
  c("article_code", "journal", "other_est_type", "bw_select_strat", "year")
)

get_count_by_year <- function(articles) {
  outp <- rep(0L, 10)
  names(outp) <- 2009:2018
  tmp <- sapply(split(articles$year, articles$year), length)
  outp[names(tmp)] <- tmp
  outp
}

make_bold <- function(x) paste0("\\textbf{", x, "}")

make_year_table <- function(init_table) {
  init_table <- cbind(init_table, "total" = rowSums(init_table))
  init_table[] <- as.character(init_table[])
  init_table[1, ncol(init_table)] <- "Total"
  init_table <- cbind(row.names(init_table), init_table)
  init_table[, ncol(init_table)] <- sapply(init_table[, ncol(init_table)], make_bold)
  init_table[, ncol(init_table)] <- paste0(init_table[, ncol(init_table)], " \\\\")
  init_table[nrow(init_table), 1:(ncol(init_table) - 1)] <-
    sapply(init_table[nrow(init_table), 1:(ncol(init_table) - 1)], make_bold)
  init_table <- apply(init_table, 1, paste0, collapse = " & ")
  init_table <- c(
    "\\toprule",
    init_table[1],
    "\\midrule",
    init_table[2:(length(init_table) - 1)],
    "\\midrule",
    init_table[length(init_table)],
    "\\bottomrule"
  )
  paste0(init_table, collapse = "\n")
}


# Table S2: Yearly Published RD Articles

by_journal_table <- rbind(
  "Journal" = 2009:2018,
  "APSR" = get_count_by_year(subset(articles, journal == "apsr")),
  "AJPS" = get_count_by_year(subset(articles, journal == "ajps")),
  "JOP" = get_count_by_year(subset(articles, journal == "jop")),
  "All" = get_count_by_year(articles)
)

cat(make_year_table(by_journal_table), file = "output/tables/tableS2-journal-count.tex")


# Table S3: Bandwidth Selection in Sample of RD Articles

automated_bw <- articles$bw_select_strat %in% c("cct mse optimal", "ik mse optimal", "cross validation method")
by_method_table <- rbind(
  "Method" = 2009:2018,
  "Automated (e.g. CCT and IK)" = get_count_by_year(subset(articles, automated_bw)),
  "Non-automated" = get_count_by_year(subset(articles, !automated_bw & is.na(other_est_type))),
  "Global Polynomial" = get_count_by_year(subset(articles, other_est_type == "global polynomial")),
  "All" = get_count_by_year(articles)
)

# Add Erikson et al. (2015)
by_method_table["Non-automated", "2015"] <- by_method_table["Non-automated", "2015"] + 1
by_method_table["All", "2015"] <- by_method_table["All", "2015"] + 1

cat(make_year_table(by_method_table), file = "output/tables/tableS3-method-count.tex")


# Table S6: Distributions of t-stats for RDrobust and RDhonest

get_tstat_stats <- function(ests) {
  c(
    mean = mean(ests), sd = sd(ests),
    quantile(ests, probs = c(.05, .10, .25, .50, .75, .90, .95))
  )
}

tstats_subsamples <- list(
  "Original ests. (all papers)" = subset(rd_papers, main_estimate)$tstat,
  "Original (rdrobust studies)" = subset(reanalysis_res, main_estimate)$tstat,
  "Reanalysis results (rdrobust studies)" = subset(reanalysis_res, main_estimate)$cct_robust_tstat,
  "Original (RDHonest studies)" = subset(reanalysis_res, main_estimate & !is.na(rdhonest_coef))$tstat,
  "Reanalysis results (RDHonest studies)" = subset(reanalysis_res, main_estimate & !is.na(rdhonest_coef))$rdhonest_tstat
)

tstat_table <- do.call(rbind, lapply(tstats_subsamples, get_tstat_stats))

tstat_table <- format(tstat_table, digits = 2)
tstat_table <- cbind(rownames(tstat_table), tstat_table)
tstat_table <- rbind(c("Sample", "Mean", "S.D.", "0.05", "0.10", "0.25", "0.50", "0.75", "0.90", "0.95"), tstat_table)
tstat_table <- apply(tstat_table, 1, paste0, collapse = " & ")
tstat_table <- paste0(tstat_table, " \\\\")
tstat_table <- c(
  "\\toprule",
  "& & & \\multicolumn{7}{c}{Quantiles} \\\\ \\cmidrule{4-10}",
  tstat_table[1],
  "\\midrule",
  tstat_table[2:6],
  "\\bottomrule"
)
tstat_table <- paste0(tstat_table, collapse = "\n")

cat(tstat_table, file = "output/tables/tableS6-t-stat-dists.tex")
